home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-06-28 | 6.9 KB | 330 lines | [TEXT/MSET] |
- \ Utility subroutines for the String+ class.
- \ Separated from String+ and revised - Aug 87.
- \ Error checking improved - May 88.
- \ Version for Mops - June 89.
-
- 0 value CASE? \ True if case to be significant in comparisons
-
- $ D constant RET \ Carriage return
- 0 value $START \ Addr of start of (chars of) current string.
-
-
- \ ======== TRTBL class ========
-
- \ Translate tables allow very fast searching of strings for specified sets
- \ of characters. In effect we are separating the specification of what we
- \ are searching for from the actual search operation itself. This allows an
- \ uncluttered and extremely fast search operation (the SCAN: and <SCAN: methods
- \ of class STRING+), and it also allows a very flexible (and easily extensible)
- \ choice of what to search for. The setup time for translate tables can
- \ generally be factored out of inner loops, or done at compile time, and is
- \ quite fast, anyway.
- \ We first define a class (trtbl) which is needed to define the table mapping
- \ lower case letters to upper case. This table is then used by some of
- \ the methods in the trtbl class proper.
-
- :class (TRTBL) super{ object }
-
- record
- { int COUNT
- 256 bytes THETBL
- }
-
- :m TBL: addr: theTbl ;m
-
- :m >UC:
- addr: theTbl & A +
- addr: theTbl & a +
- 26 cmove ;m
-
- :mcode TRANSC: \ ( c -- c' ) Translates 1 char using the table.
- MOVE (SP),D0
- MOVE.B 2(A2,D0.W),3(SP)
- ;mcode
-
- ;class
-
-
- (trtbl) UCTBL \ Maps lower case letters to upper case, and
- \ leaves everything else unchanged.
-
- : XX
- 0 tbl: UCtbl 256 bounds
- DO dup i c! 1+ LOOP
- drop >uc: UCtbl ;
-
- xx forget xx
-
- :code (SELC) \ Subroutine used by SELCHAR: and SELCHARNC:.
- ADDQ.W #1,(A2)
- MOVE.W (A2)+,D1
- MOVE.B D1,0(A2,D2.W)
- ;code
-
-
- :class TRTBL super{ (trtbl) }
-
- :mcode CLEAR:
- loc
- CLR.W (A2)+
- MOVEQ #63,D0
- loop CLR (A2)+
- DBRA D0,loop
- ;mcode
-
-
- :m PUT: { addr len -- }
- addr addr: theTbl len 256 min cmove ;m
-
-
- :mcode SELCHARS: \ ( addr len -- )
- loc
- POP D0 ; D0 = len
- POP A1 ; A1 = addr
- ADD D0,A1
- MOVE D0,D1
- ADD.W (A2),D1
- MOVE.W D1,(A2)+
- MOVEQ #0,D2
- BRA.S lptst
-
- loop MOVE.B -(A1),D2
- MOVE.B D1,0(A2,D2.W)
- SUBQ #1,D1
- lptst DBRA D0,loop
- ;mcode
-
-
- :mcode SELCHAR: \ ( c -- )
- POP D2
- BSR dic[(selc)]
- ;mcode
-
-
- :mcode SELCHARNC: \ ( c -- ) "SelChar, no case".
- \ Selects a character, and if it is a letter,
- \ enters the same value in the LC and UC positions of the
- \ table, so that case will in effect be ignored when the
- \ table is used.
- POP D2
- LEA 10(dic[UCtbl]),A0 ; Offset is offs to ^obj, plus 2
- MOVE.B 0(A0,D2.W),D2 ; Convert char to upper case
- BSR dic[(selc)]
- CMPI.B #$41,D2
- BLT.S end
- CMPI.B #$5A,D2
- BGT.S end
- ORI.B #$20,D2
- MOVE.B D1,0(A2,D2.W)
- end
- ;mcode
-
- :mcode SELRANGE: \ ( lo hi -- )
- loc
- ADDQ #2,A2
- POP D0 ; hi
- POP D1 ; lo
- ADD D1,A2
- SUB D1,D0
- BLT.S end
- MOVEQ #1,D2
-
- loop MOVE.B D2,(A2)+
- lptst DBRA D0,loop
- end
- ;mcode
-
- :mcode INVERT:
- loc
- ADDQ #2,A2
- MOVEQ #255,D0
- loop TST.B (A2)
- SEQ (A2)+
- DBRA D0,loop
- ;mcode
-
- ;class
-
-
- \ GETIT is a code subroutine to get the address and length of the active part
- \ of the current string. A2 points to the string object.
- \
- \ Returns:
- \ A0 addr of first char of the active part
- \ D0 length of active part
- \ D2 (lo half) high 16 bits of length - may be used as an outer loop
- \ counter in DBxx loops.
- \ CC result of subtracting POS from LIM to get the length.
- \ $start addr of the start of the whole string
- \
- \ If this length turns out to be negative, $CHK is called to give an error trap.
- \ We don't take a length of zero as an error (there are some situations where
- \ this is quite legitimate). Those operations which don't like a zero
- \ length can call $CHK themselves.
- \ This subroutine must be called from a method, with A2 undisturbed.
- \ Only A0, A2, D0 and D2 are altered.
-
- :code GETIT
- loc
- MOVE (A2),A0 ; A0 = handle
- MOVE (A0),A0 ; Dereference it - addr of start of string
- MOVE A0,dic[$start] ; Leave in $start
- ADD 8(A2),A0 ; Add POS, giving addr of start of active part
- MOVE 12(A2),D0 ; D0 = LIM
- SUB 8(A2),D0 ; Subtract POS, giving length
- MOVE D0,D2
- SWAP D2 ; Hi 16 bits to lo half of D2
- TST D0 ; Test length
- BGE.S end
- JMP dic[$fail] ; If negative, error
- end
- ;code
-
-
- \ CCMP is the primitive subroutine for performing string comparison.
- \ A0 -> string2
- \ A1 -> string1
- \ D0 = length
- \ Assumes length is less than 64K.
- \ Returns with the CC set appropriately.
- \ Uses those registers.
-
- :code CCMP
- loc
- SUBQ #1,D0
- BMI.S equal
- TST dic[case?]
- BEQ.S nocase
-
- loop1 CMPM.B (A0)+,(A1)+
- DBNE D0,loop1
- RTS
-
- equal CMP.W D0,D0
- RTS
-
- nocase MOVEM D2/D3/A2,-(SP)
- MOVEQ #0,D2
- LEA 10(dic[UCtbl]),A2
-
- loop2 CMPM.B (A0)+,(A1)+
- lp2tst DBNE D0,loop2
- BEQ.S end
- MOVE.B -1(A1),D2
- MOVE.B 0(A2,D2.W),D3
- MOVE.B -1(A0),D2
- CMP.B 0(A2,D2.W),D3
- BEQ.S lp2tst
- end MOVEM (SP)+,D2/D3/A2
- ;code
-
-
- \ CSCH and <CSCH are the primitive subroutines for searching for a single
- \ character.
- \ A0 -> string
- \ D0 = length
- \ D2 = length (hi)
- \ D1 = char (rest must be zero)
- \ Both routines return with the CC set appropriately.
-
- :code CSCH
- loc
- TST dic[case?]
- BEQ.S nocase
- BRA.S lp1tst ; Note: we enter the loop with "not equal"
-
- loop1 CMP.B (A0)+,D1
- lp1tst DBEQ D0,loop1
- DBEQ D2,loop1
- RTS
-
- nocase MOVEM D1/D2/A2,-(SP)
- LEA 10(dic[UCtbl]),A2
- MOVE.B 0(A2,D1.W),D1
- MOVEQ #1,D2 ; Set "not equal", clear top 3 bytes of D2
- BRA.S lp2tst
-
- outer MOVE D2,4(SP)
- loop2 MOVE.B (A0)+,D2
- CMP.B 0(A2,D2.W),D1
- lp2tst DBEQ D0,loop2
- MOVEM 4(SP),D2 ; Recover outer loop counter, preserving CC
- DBEQ D2,outer
- MOVEM (SP)+,D1/D2/A2
- end
- ;code
-
- :code <CSCH
- loc
- TST dic[case?]
- BEQ.S nocase
- BRA.S lp1tst ; Note: we enter the loop with "not equal"
-
- loop1 CMP.B -(A0),D1
- lp1tst DBEQ D0,loop1
- DBEQ D2,loop1
- BRA.S end
-
- nocase MOVEM D1/D2/A2,-(SP)
- LEA 10(dic[UCtbl]),A2
- MOVE.B 0(A2,D1.W),D1
- MOVEQ #1,D2 ; Set "not equal", clear top 3 bytes of D2
- BRA.S lp2tst
-
- outer MOVE D2,4(SP)
- loop2 MOVE.B -(A0),D2
- CMP.B 0(A2,D2.W),D1
- lp2tst DBEQ D0,loop2
- MOVEM 4(SP),D2 ; Recover outer loop counter, preserving CC
- DBEQ D2,outer
- MOVEM (SP)+,D1/D2/A2
- end
- ;code
-
-
- \ CMPSTR ( addr1 len1 addr2 len2 -- n ) compares 2 strings.
- \ Case is significant if CASE? is set to true.
- \ Returns:
- \ -1 first string low
- \ 0 strings are equal
- \ 1 first string high
- \ We assume the lengths are both less than 64K.
- \
- \ Uses D0,D1,D2,A0,A1.
-
- :code CMPSTR
- loc
- POP D0 ; D0 = len2
- POP A0 ; A0 = addr2
- POP D1 ; D1 = len1
- MOVE (SP),A1 ; A1 = addr1
- MOVEQ #0,D2 ; D2 will hold return result
- CMP.W D1,D0 ; Compare lengths
- BEQ.S docmp
- BHI.S op2long
- MOVEQ #1,D2
- BRA.S docmp
-
- op2long MOVE.W D1,D0
- MOVEQ #-1,D2
-
- docmp BSR dic[ccmp]
- BEQ.S end
- SMI D2
- ORI.B #1,D2
- EXT.W D2
- EXT.L D2
- end MOVE D2,(SP)
- ;code
-
-
- \ INSTEAD ( c-old c-new -- ) may be used just after a SCON is defined.
- \ Within the SCON, it replaces any occurrences of c-old with c-new. This
- \ operation is useful for creating SCONs containing special characters
- \ such as tab.
-
- : INSTEAD { c-old c-new -- }
- latest name> ex-gen bounds \ SCONs use DOES> so require EX-GEN
- DO i c@ c-old = IF c-new i c! THEN
- LOOP ;
-